{
    "cells": [
        {
            "attachments": {},
            "cell_type": "markdown",
            "id": "cfb64210-9c6b-47d7-81f4-67dbdab68e4c",
            "metadata": {
                "tags": []
            },
            "source": [
                "# Composable Graph Basic"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "41927486",
            "metadata": {},
            "outputs": [],
            "source": [
                "# NOTE: This is ONLY necessary in jupyter notebook.\n",
                "# Details: Jupyter runs an event-loop behind the scenes.\n",
                "#          This results in nested event-loops when we start an event-loop to make async queries.\n",
                "#          This is normally not allowed, we use nest_asyncio to allow it for convenience.\n",
                "import nest_asyncio\n",
                "\n",
                "nest_asyncio.apply()"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "fa0e62b6",
            "metadata": {},
            "outputs": [],
            "source": [
                "import logging\n",
                "import sys\n",
                "\n",
                "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
                "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "e27b0473-4bda-47f0-b6ed-fd482eac1a13",
            "metadata": {},
            "outputs": [],
            "source": [
                "from llama_index import (\n",
                "    VectorStoreIndex,\n",
                "    EmptyIndex,\n",
                "    TreeIndex,\n",
                "    SummaryIndex,\n",
                "    SimpleDirectoryReader,\n",
                "    ServiceContext,\n",
                "    StorageContext,\n",
                ")"
            ]
        },
        {
            "cell_type": "markdown",
            "id": "49e0d841-680f-4a0c-b455-788b54978ebf",
            "metadata": {},
            "source": [
                "### Load Datasets\n",
                "\n",
                "Load PG's essay"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "ddff8f98-e002-40c5-93ac-93aa40dca5ca",
            "metadata": {},
            "outputs": [],
            "source": [
                "# load PG's essay\n",
                "essay_documents = SimpleDirectoryReader(\"../paul_graham_essay/data/\").load_data()"
            ]
        },
        {
            "cell_type": "markdown",
            "id": "f1782198-c0de-4679-8951-1297c21b8639",
            "metadata": {
                "tags": []
            },
            "source": [
                "### Building the document indices\n",
                "- Build a vector index for PG's essay\n",
                "- Also build an empty index (to store prior knowledge)"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "8b5aad4a-49ef-4b24-962a-0793f4f09316",
            "metadata": {},
            "outputs": [],
            "source": [
                "# configure\n",
                "service_context = ServiceContext.from_defaults(chunk_size=512)\n",
                "storage_context = StorageContext.from_defaults()\n",
                "\n",
                "# build essay index\n",
                "essay_index = VectorStoreIndex.from_documents(\n",
                "    essay_documents, service_context=service_context, storage_context=storage_context\n",
                ")\n",
                "empty_index = EmptyIndex(\n",
                "    service_context=service_context, storage_context=storage_context\n",
                ")"
            ]
        },
        {
            "cell_type": "markdown",
            "id": "4ee2ed80-fa2a-477b-835c-464c6fc1d973",
            "metadata": {},
            "source": [
                "### Query Indices\n",
                "See the response of querying each index"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "df22aada-bd3c-48e8-98dd-ec38691a6414",
            "metadata": {
                "scrolled": true,
                "tags": []
            },
            "outputs": [],
            "source": [
                "query_engine = essay_index.as_query_engine(\n",
                "    similarity_top_k=3,\n",
                "    response_mode=\"tree_summarize\",\n",
                ")\n",
                "response = query_engine.query(\n",
                "    \"Tell me about what Sam Altman did during his time in YC\",\n",
                ")"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "718f0063-e41c-42da-a6f5-3cae90f7c6d3",
            "metadata": {
                "tags": []
            },
            "outputs": [],
            "source": [
                "print(str(response))"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "1b934abf-bb30-4d86-b0ba-3dc60666b798",
            "metadata": {
                "tags": []
            },
            "outputs": [],
            "source": [
                "query_engine = empty_index.as_query_engine(response_mode=\"generation\")\n",
                "response = query_engine.query(\n",
                "    \"Tell me about what Sam Altman did during his time in YC\",\n",
                ")"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "f677f144-549c-404f-aafb-5ce8fa295146",
            "metadata": {
                "tags": []
            },
            "outputs": [],
            "source": [
                "print(str(response))"
            ]
        },
        {
            "cell_type": "markdown",
            "id": "ff521fbb",
            "metadata": {},
            "source": [
                "Define summary for each index."
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "4149cbbd-7d0b-48c4-8c47-7d67ae0c55f0",
            "metadata": {},
            "outputs": [],
            "source": [
                "essay_index_summary = \"This document describes Paul Graham's life, from early adulthood to the present day.\"\n",
                "empty_index_summary = \"This can be used for general knowledge purposes.\""
            ]
        },
        {
            "cell_type": "markdown",
            "id": "eebbc448-1e0b-402c-b37e-f93bfcc0bf4f",
            "metadata": {},
            "source": [
                "### Define Graph (Summary Index as Parent Index)\n",
                "\n",
                "This allows us to synthesize responses both using a knowledge corpus as well as prior knowledge."
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "c0580ff9-ca0a-4ac1-93ef-b570903ea404",
            "metadata": {
                "tags": []
            },
            "outputs": [],
            "source": [
                "from llama_index.indices.composability import ComposableGraph"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "eb064bf2-77f5-4205-bd1e-ec7de40a6f7f",
            "metadata": {
                "tags": []
            },
            "outputs": [],
            "source": [
                "graph = ComposableGraph.from_indices(\n",
                "    SummaryIndex,\n",
                "    [essay_index, empty_index],\n",
                "    index_summaries=[essay_index_summary, empty_index_summary],\n",
                "    service_context=service_context,\n",
                "    storage_context=storage_context,\n",
                ")"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "ae127943-afac-48b4-b22d-84a37e553e4b",
            "metadata": {},
            "outputs": [],
            "source": [
                "# [optional] persist to disk\n",
                "storage_context.persist()\n",
                "root_id = graph.root_id"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "dca2b64b-9af1-456f-8dab-822bfdc5d0ac",
            "metadata": {},
            "outputs": [],
            "source": [
                "# [optional] load from disk\n",
                "from llama_index.indices.loading import load_graph_from_storage\n",
                "\n",
                "graph = load_graph_from_storage(storage_context, root_id=root_id)"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "7a811f1a",
            "metadata": {},
            "outputs": [],
            "source": [
                "# configure query engines\n",
                "custom_query_engines = {\n",
                "    essay_index.index_id: essay_index.as_query_engine(\n",
                "        similarity_top_k=3,\n",
                "        response_mode=\"tree_summarize\",\n",
                "    )\n",
                "}"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "f3c4e58b-b153-4e43-bc02-274a85babbe8",
            "metadata": {},
            "outputs": [],
            "source": [
                "# set Logging to DEBUG for more detailed outputs\n",
                "# ask it a question about Sam Altman\n",
                "query_engine = graph.as_query_engine(custom_query_engines=custom_query_engines)\n",
                "response = query_engine.query(\n",
                "    \"Tell me about what Sam Altman did during his time in YC\",\n",
                ")"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "c0a43443-3e00-4e48-b3ab-f6369191d53a",
            "metadata": {},
            "outputs": [],
            "source": [
                "print(str(response))"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "c78bc3da-6bad-4998-9a81-90a3fa9200a9",
            "metadata": {},
            "outputs": [],
            "source": [
                "# Get source of response\n",
                "print(response.get_formatted_sources())"
            ]
        },
        {
            "cell_type": "markdown",
            "id": "f437c6df-31b1-40d9-9b57-70f7e0318eb7",
            "metadata": {
                "tags": []
            },
            "source": [
                "### Define Graph (Tree Index as Parent Index)\n",
                "\n",
                "This allows us to \"route\" a query to either a knowledge-augmented index, or to the LLM itself."
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "d0c05040-0f6c-4e9d-bf08-4e5207ea2774",
            "metadata": {
                "tags": []
            },
            "outputs": [],
            "source": [
                "from llama_index.indices.composability import ComposableGraph"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "a6c1b887-9cb5-49db-a9c7-5cb348beff58",
            "metadata": {
                "tags": []
            },
            "outputs": [],
            "source": [
                "# configure retriever\n",
                "custom_query_engines = {\n",
                "    essay_index.index_id: essay_index.as_query_engine(\n",
                "        similarity_top_k=3,\n",
                "        response_mode=\"tree_summarize\",\n",
                "    )\n",
                "}"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "d5579f16-cee5-4287-b89e-635d161bdfb5",
            "metadata": {
                "tags": []
            },
            "outputs": [],
            "source": [
                "graph2 = ComposableGraph.from_indices(\n",
                "    TreeIndex,\n",
                "    [essay_index, empty_index],\n",
                "    index_summaries=[essay_index_summary, empty_index_summary],\n",
                ")"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "c57d370f-59af-4a2d-8fc6-05cf93d958e5",
            "metadata": {
                "tags": []
            },
            "outputs": [],
            "source": [
                "# set Logging to DEBUG for more detailed outputs\n",
                "# ask it a question about NYC\n",
                "query_engine = graph2.as_query_engine(custom_query_engines=custom_query_engines)\n",
                "response = query_engine.query(\n",
                "    \"Tell me about what Paul Graham did growing up?\",\n",
                ")"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "1d99502a-ab3c-48da-bfb1-c54a95dadbb5",
            "metadata": {
                "tags": []
            },
            "outputs": [],
            "source": [
                "str(response)"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "997498a9-128d-4c0b-8826-c6d6871571f5",
            "metadata": {
                "tags": []
            },
            "outputs": [],
            "source": [
                "print(response.get_formatted_sources())"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "8dc10463-ca79-4b47-83d6-217bd186d822",
            "metadata": {
                "tags": []
            },
            "outputs": [],
            "source": [
                "response = query_engine.query(\n",
                "    \"Tell me about Barack Obama\",\n",
                ")"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "9b8411a0-f9a8-4f1b-a476-03e746ec8ab3",
            "metadata": {
                "tags": []
            },
            "outputs": [],
            "source": [
                "str(response)"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "749625a3-722c-4bf4-b4ef-55b00f20ef20",
            "metadata": {
                "tags": []
            },
            "outputs": [],
            "source": [
                "response.get_formatted_sources()"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "78a616a3",
            "metadata": {},
            "outputs": [],
            "source": []
        }
    ],
    "metadata": {
        "kernelspec": {
            "display_name": "Python 3.11.0 ('llama')",
            "language": "python",
            "name": "python3"
        },
        "language_info": {
            "codemirror_mode": {
                "name": "ipython",
                "version": 3
            },
            "file_extension": ".py",
            "mimetype": "text/x-python",
            "name": "python",
            "nbconvert_exporter": "python",
            "pygments_lexer": "ipython3",
            "version": "3.9.16"
        },
        "vscode": {
            "interpreter": {
                "hash": "775fd5332502f2902173832d699e1edc37222ebadd0e97b5c8a1a7431bebae89"
            }
        }
    },
    "nbformat": 4,
    "nbformat_minor": 5
}